---
title: "Cleaning Perception and Acceptance of Religious Diversity among the European Population"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load data
  percep_euro_raw <- 
    read_dta("../raw-data/y-multi-perception-and-acceptance-of-religious-diversity/Perception and Acceptance of Religious Diversity among the European Population.DTA")
```

# Clean

```{r}
# declare dates
  percep_euro_dates <-
    tribble(
       ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
      "Germany", "2010-06-01", "2010-08-01",
      "Denmark", "2010-06-01", "2010-08-01",
      "France", "2010-06-01", "2010-08-01",
      "Netherlands", "2010-06-01", "2010-08-01",
      "Portugal", "2010-06-01", "2010-08-01") %>%
    mutate(
      resp_interview_start_date = as.Date(resp_interview_start_date, "%Y-%m-%d"),
      resp_interview_end_date = as.Date(resp_interview_end_date, "%Y-%m-%d")
    )

# clean
  clean_percep_euro <-
    percep_euro_raw %>%
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Perception and Acceptance of Religious Diversity among the European Population",
        
      # round number (character vector, title case)  
        resp_round = "",      
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3lWa5hX",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = 
          case_when(
            land == 1 ~ "in-person", # Germany
            land %in% c(2:5) ~ "phone"),

      # country (character vector; list of countries as written in original source)
        resp_country_original = 
          dplyr::recode(
            as.character(land),
              "1" = "Germany",
              "2" = "Denmark",
              "3" = "France",
              "4" = "Netherlands",
              "5" = "Portugal"),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = NA) %>% 
        left_join(
          percep_euro_dates, by = "resp_country_common") %>% 
        mutate( 
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's denomination (character vector that corresponds to master list)
      land = to_character(land),
      resp_denomination = 
        case_when(
          land == "Denmark" ~ to_character(V12_DK),
          land == "France" ~ to_character(V12_F),
          land == "Germany" ~ to_character(V12),
          land == "Netherlands" ~ to_character(V12_NL),
          land == "Portugal" ~ to_character(V12_PT)),
      
      # respondent's religion (character vector that corresponds to master list)
      resp_religion = 
        case_when(
          
          resp_denomination %in% c(
            "Protestant Church (without Free Churches)",
            "Roman-Catholic Church",
            "Free Church",
            "Orthodox Church",
            "Other Christian Church",
            "The Danish People's Church",
            "The Roman Catholic Church",
            "Catholic",
            "Protestant",
            "Orthodox",
            "Evangelical Lutherans",
            "Roman Catholic",
            "Dutch Reformed (Nederlands Hervormd)",
            "Protestants\\PKN",
            "Other Christian denomination",
            "Reformed Churches (Gereformeed)",
            "Other Protestant denomination",
            "Liberal (Vrijzinnig)",
            "Other Christian church") ~ "Christian",
          
          resp_denomination %in% c(
            "Islam",
            "Muslim") ~ "Muslim",
          
          resp_denomination %in% c(
            "Other Non-Christian religion",
            "Other non-Christian religion",
            "Forn Sior (belief in Nordic gods)",
            "Other",
            "Other religions/churches",
            "Another non-Christian faith") ~ "Other religion",
          
          resp_denomination %in% c(
            "Jewish") ~ "Jewish",
          
          resp_denomination %in% c(
            "Buddhism") ~ "Buddhist",
          
          TRUE ~ NA_character_), 
    
      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age = as.character(V102),
   
      # respondent's education level
        resp_education_original = # significant missingness present in original variables
          case_when( #V103: years of education; V103_D: level of education (only asked in Germany)
            land == 1 & V103_D == 1 ~ "1. I'm still a pupil [Primary]",
            land == 1 & V103_D == 2 ~ "2. Left school without graduate certificate [Primary]",
            land == 1 & V103_D == 3 ~ "3. Grade nine exam passed (Volks-Hauptschulabschluss) [Primary]",
            land == 1 & V103_D == 4 ~ "4. Grade ten exam passed (Mittlere Reife, Realschulabschluss) [Primary]",
            land == 1 & V103_D == 5 ~ "5. East Germany polytechnic school graduate, degree eight or nine (Polytechnische Oberschule mit Abschluss 8 oder 9 Klasse) [Primary]",
            land == 1 & V103_D == 6 ~ "6. East Germany polytechnic school graduate, degree 10 (Polytechnische Oberschule mit Abschluss 10 Klasse) [Primary]",
            land == 1 & V103_D == 7 ~ "7. Fachhochschulreife (Abschluss einer Fachoberschule etc) [Primary]",
            land == 1 & V103_D == 8 ~ "8. University qualifying exam passed (Abitur (Hochschulreife) bzw. erweiterte Oberschule mit Abschl) [Primary]",
            land == 1 & V103_D == 9 ~ "9. Technical college degree (Fachhochschulabschlus) [Primary]",
            land == 1 & V103_D == 10 ~ "10. University degree (Universitatsabschluss, Hochschulabschluss) [College]",
            land == 1 & V103_D == 11 ~ "11. Other educational certificate (anderer Schulabschluss) [Primary]",
            land == 1 & V103_D == 99 ~ NA_character_,
            
            land != 1 & V103 < 5 ~ "<5 years of schooling [No education]",
            land != 1 & V103 >= 5 & V103 < 15 ~ "Between 5 and 15 years of schooling [Primary]",
            land != 1 & V103 >= 12 ~ ">=15 years of schooling [College]",
            land != 1 & V103 %in% c(97, 98, 99) ~ NA_character_),

      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female = 
          case_when(
            V101 == 1 ~ 0,
            V101 == 2 ~ 1,
            TRUE ~ NA_real_),
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural =
          case_when(
            rururb == 1 ~ 1,
            rururb == 2 ~ 0,
            TRUE ~ NA_real_),      
    
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: V7.1; QTEXT: What is your personal attitude toward members of the following religious groups? Christians.; ROPTIONS: 1 = Very positive [=0] + 2 = Somewhat positive [=0] + 3 = Somewhat negative [=1] + 4 =	Very negative [=1]; TARGET: Christian; TYPE: Favorability",

      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(V7_1),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            V7_1 %in% c(1:2) ~ 0,
            V7_1 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 2 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_2_qinfo = "NUM: V7.2; QTEXT: What is your personal attitude toward members of the following religious groups? Muslims.; ROPTIONS: 1 = Very positive [=0] + 2 =	Somewhat positive [=0] + 3 = Somewhat negative [=1] + 4 =	Very negative [=1]; TARGET: Muslim; TYPE: Favorability",

      # original response (as character vector)
        resp_soc_dist_2_original = 
          dplyr::recode(
            as.character(V7_2),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_2_bin_recode = 
          case_when(
            V7_2 %in% c(1:2) ~ 0,
            V7_2 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),    
    
    #########################  
    ### SOCIAL DISTANCE 3 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_3_qinfo = "NUM: V7.3; QTEXT: What is your personal attitude toward members of the following religious groups? Hindus.; ROPTIONS: 1 = Very positive [=0] + 2 = Somewhat positive [=0] + 3 = Somewhat negative [=1] + 4 =	Very negative [=1]; TARGET: Hindu; TYPE: Favorability",

      # original response (as character vector)
        resp_soc_dist_3_original = 
          dplyr::recode(
            as.character(V7_3),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_3_bin_recode = 
          case_when(
            V7_3 %in% c(1:2) ~ 0,
            V7_3 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 4 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_4_qinfo = "NUM: V7.4; QTEXT: What is your personal attitude toward members of the following religious groups? Buddhists.; ROPTIONS: 1 = Very positive [=0] + 2 = Somewhat positive [=0] + 3 = Somewhat negative [=1] + 4 =	Very negative [=1]; TARGET: Buddhist; TYPE: Favorability",

      # original response (as character vector)
        resp_soc_dist_4_original = 
          dplyr::recode(
            as.character(V7_4),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_4_bin_recode = 
          case_when(
            V7_4 %in% c(1:2) ~ 0,
            V7_4 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 5 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_5_qinfo = "NUM: V7.5; QTEXT: What is your personal attitude toward members of the following religious groups? Jews.; ROPTIONS: 1 = Very positive [=0] + 2 = Somewhat positive [=0] + 3 = Somewhat negative [=1] + 4 =	Very negative [=1]; TARGET: Jewish; TYPE: Favorability",

      # original response (as character vector)
        resp_soc_dist_5_original = 
          dplyr::recode(
            as.character(V7_5),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_5_bin_recode = 
          case_when(
            V7_5 %in% c(1:2) ~ 0,
            V7_5 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 6 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_6_qinfo = "NUM: V10.1; QTEXT: How pleasant do you find these contacts with Christians?; ROPTIONS: 1 = Very pleasant [=0] + 2 = Somewhat pleasant [=0] + 3 = Somewhat unpleasant [=1] + 4 = Very unpleasant [=1]; TARGET: Christian; TYPE: Discomfort",

      # original response (as character vector)
        resp_soc_dist_6_original = 
          dplyr::recode(
            as.character(V10_1),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_6_bin_recode = 
          case_when(
            V10_1 %in% c(1:2) ~ 0,
            V10_1 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),
  
    #########################  
    ### SOCIAL DISTANCE 7 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_7_qinfo = "NUM: V10.2; QTEXT: How pleasant do you find these contacts with Muslims?; ROPTIONS: 1 = Very pleasant [=0] + 2 = Somewhat pleasant [=0] + 3 = Somewhat unpleasant [=1] + 4 = Very unpleasant [=1]; TARGET: Muslim; TYPE: Discomfort",

      # original response (as character vector)
        resp_soc_dist_7_original = 
          dplyr::recode(
            as.character(V10_2),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_7_bin_recode = 
          case_when(
            V10_2 %in% c(1:2) ~ 0,
            V10_2 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),
  
    #########################  
    ### SOCIAL DISTANCE 8 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_8_qinfo = "NUM: V10.3; QTEXT: How pleasant do you find these contacts with Hindus?; ROPTIONS: 1 = Very pleasant [=0] + 2 = Somewhat pleasant [=0] + 3 = Somewhat unpleasant [=1] + 4 = Very unpleasant [=1]; TARGET: Hindu; TYPE: Discomfort",

      # original response (as character vector)
        resp_soc_dist_8_original = 
          dplyr::recode(
            as.character(V10_3),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_8_bin_recode = 
          case_when(
            V10_3 %in% c(1:2) ~ 0,
            V10_3 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),
  
    #########################  
    ### SOCIAL DISTANCE 9 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_9_qinfo = "NUM: V10.4; QTEXT: How pleasant do you find these contacts with Buddhists?; ROPTIONS: 1 = Very pleasant [=0] + 2 = Somewhat pleasant [=0] + 3 = Somewhat unpleasant [=1] + 4 = Very unpleasant [=1]; TARGET: Buddhist; TYPE: Discomfort",

      # original response (as character vector)
        resp_soc_dist_9_original = 
          dplyr::recode(
            as.character(V10_4),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_9_bin_recode = 
          case_when(
            V10_4 %in% c(1:2) ~ 0,
            V10_4 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),
  
    #########################  
    ### SOCIAL DISTANCE 10 ##  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_10_qinfo = "NUM: V10.5; QTEXT: How pleasant do you find these contacts with Jews?; ROPTIONS: 1 = Very pleasant [=0] + 2 = Somewhat pleasant [=0] + 3 = Somewhat unpleasant [=1] + 4 = Very unpleasant [=1]; TARGET: Jewish; TYPE: Discomfort",

      # original response (as character vector)
        resp_soc_dist_10_original = 
          dplyr::recode(
            as.character(V10_5),
            "98" = NA_character_,
            "99" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_10_bin_recode = 
          case_when(
            V10_5 %in% c(1:2) ~ 0,
            V10_5 %in% c(3:4) ~ 1,
            TRUE ~ NA_real_),

    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: 9 [V11_1]; QTEXT: What is your opinion on the following statements? There are few people in this world you can trust.; ROPTIONS: 1(/2) = Agree strongly [=1] + (3/4/)5 = Disagree strongly [=0]",

      # original response (as character vector)
        resp_gentrust_original = 
          dplyr::recode(
            as.character(V11_1),
            "98" = NA_character_,
            "98" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = 
          case_when(
            V11_1 %in% c(1:2) ~ 1,
            V11_1 %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),            
    
    ###################
    ### RELIGIOSITY ###  
    ###################
    
        resp_religiosity_qinfo = "NUM: 91 [V14]; QTEXT: Which of the following statements describes you best?; ROPTIONS: 1 = I consider myself a religious and spiritual person + 2 = I consider myself a religious person, but not a spiritual person + 3 = I consider myself not a religious person, but as a spiritual person + 4 = I consider myself neither a religious nor a spiritual person",
    
        resp_religiosity_original = as.numeric(V14),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = 
          case_when(
            resp_religiosity_original %in% c(1:2) ~ 1,
            resp_religiosity_original %in% c(3:4) ~ 0)
    
    ) %>% 
    select(starts_with("resp_"))
```

# Save

```{r}
# save cleaned data
  saveRDS(clean_percep_euro, "../cleaned-data/y-8-multi-perception-and-acceptance-european-population.rds")
```
